This is the instruction for the codes of UCL MSc CEGE0042 coursework. The aim of this coursework is to conduct an exploratory spatio-temporal analysis of Vancouver crime data and make predictions.
Most of the code is derived from Dr James’ book Spatio-temporal Analytics in R.
Data source: Crime in Vancouver, Vancouver district.
Directories in the coursework will point to a folder called ‘Coursework’. Please create this folder somewhere in your computer and use ‘setwd’ to set the working directory to point to it. For example, if ‘Coursework’ is placed in ‘/Users’ then run setwd(‘/Users/Coursework’).
This notebook is divided into 3 parts:
- Part 1: Data Import and Pre-processing
- Part 2: Data Visualization & Autocorrelation
- Part 3: ARIMA & ANN
First check and change working directory using getwd() & setwd(). Import Vancouver Crime data and select interested attributes. Using head() to see the structure of the data. Then select the data we gonna to deal with.
# check and change working directory using getwd() & setwd()
getwd()
[1] "/Users/zmxu/Desktop/UCL MSc Geospatial Sciences/term2-周2-CEGE0042-Spatial-Temporal Data Analysis and Data Mining (STDM) /Coursework"
setwd("/Users/zmxu/Desktop/UCL MSc Geospatial Sciences/term2-周2-CEGE0042-Spatial-Temporal Data Analysis and Data Mining (STDM) /Coursework")
# import Vancouver Crime data
data <- read.csv("archive/crime.csv")
head(data,10)
# select interested attributes
crime <- data.frame(data[,c("TYPE","YEAR","MONTH","NEIGHBOURHOOD","Latitude","Longitude")])
There might be some recording errors in the data set. Clean the data set in case there are NA & 0, and output the first several lines.
crime[crime==0] <- NA
crime <- na.omit(crime)
print(nrow(crime))
[1] 476290
head(crime,10)
In order to read crime data by different type and neighbourhood, a zero-value matrix is created to hold the data, and the number of different types of crime is counted and written to the matrix by cyclically filtering the crime events that meet the conditions.
Actually the TYPE of crime was also selected, in order to know more about the data and would be helpful to the further work.
# check different TYEP & NEIGHBOURHOOD
unique(crime$TYPE)
[1] "Other Theft" "Break and Enter Residential/Other"
[3] "Mischief" "Break and Enter Commercial"
[5] "Theft from Vehicle" "Vehicle Collision or Pedestrian Struck (with Injury)"
[7] "Vehicle Collision or Pedestrian Struck (with Fatality)" "Theft of Vehicle"
[9] "Theft of Bicycle"
unique(crime$NEIGHBOURHOOD)
[1] "Strathcona" "Kerrisdale" "Dunbar-Southlands" "Grandview-Woodland"
[5] "Sunset" "West End" "Central Business District" "Hastings-Sunrise"
[9] "Victoria-Fraserview" "Fairview" "Kensington-Cedar Cottage" "West Point Grey"
[13] "Shaughnessy" "Renfrew-Collingwood" "Killarney" "Riley Park"
[17] "Arbutus Ridge" "Musqueam" "Mount Pleasant" "Kitsilano"
[21] "Stanley Park" "South Cambie" "Marpole" "Oakridge"
[25] ""
# create year_month matrix (might introduce 0)
# all crimes
year_month <- matrix(data=0,nrow=15,ncol=12)
# group by crime TYPE
year_month_M <- matrix(data=0,nrow=15,ncol=12)
year_month_OT <- matrix(data=0,nrow=15,ncol=12)
year_month_ToV <- matrix(data=0,nrow=15,ncol=12)
year_month_TfV <- matrix(data=0,nrow=15,ncol=12)
year_month_ToB <- matrix(data=0,nrow=15,ncol=12)
year_month_BEC <- matrix(data=0,nrow=15,ncol=12)
year_month_BERO <- matrix(data=0,nrow=15,ncol=12)
year_month_VCPSI <- matrix(data=0,nrow=15,ncol=12)
year_month_VCPSF <- matrix(data=0,nrow=15,ncol=12)
# group by NEIGHBOURHOOD
year_month_Strathcona <- matrix(data=0,nrow=15,ncol=12)
year_month_Kerrisdale <- matrix(data=0,nrow=15,ncol=12)
year_month_DunbarSouthlands <- matrix(data=0,nrow=15,ncol=12)
year_month_GrandviewWoodland <- matrix(data=0,nrow=15,ncol=12)
year_month_Sunset <- matrix(data=0,nrow=15,ncol=12)
year_month_WestEnd <- matrix(data=0,nrow=15,ncol=12)
year_month_CentralBusinessDistrict <- matrix(data=0,nrow=15,ncol=12)
year_month_HastingsSunrise <- matrix(data=0,nrow=15,ncol=12)
year_month_VictoriaFraserview <- matrix(data=0,nrow=15,ncol=12)
year_month_Fairview <- matrix(data=0,nrow=15,ncol=12)
year_month_KensingtonCedarCottage <- matrix(data=0,nrow=15,ncol=12)
year_month_WestPointGrey <- matrix(data=0,nrow=15,ncol=12)
year_month_Shaughnessy <- matrix(data=0,nrow=15,ncol=12)
year_month_RenfrewCollingwood <- matrix(data=0,nrow=15,ncol=12)
year_month_Killarney <- matrix(data=0,nrow=15,ncol=12)
year_month_RileyPark <- matrix(data=0,nrow=15,ncol=12)
year_month_ArbutusRidge <- matrix(data=0,nrow=15,ncol=12)
year_month_Musqueam <- matrix(data=0,nrow=15,ncol=12)
year_month_MountPleasant <- matrix(data=0,nrow=15,ncol=12)
year_month_Kitsilano <- matrix(data=0,nrow=15,ncol=12)
year_month_StanleyPark <- matrix(data=0,nrow=15,ncol=12)
year_month_SouthCambie <- matrix(data=0,nrow=15,ncol=12)
year_month_Marpole <- matrix(data=0,nrow=15,ncol=12)
year_month_Oakridge <- matrix(data=0,nrow=15,ncol=12)
year <- c(2003:2017)
month <- c(1:12)
for (i in year){
for (j in month){
# all crimes
year_month[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j))
# group by TYPE
year_month_M[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & TYPE=="Mischief"))
year_month_OT[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & TYPE=="Other Theft"))
year_month_ToV[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & TYPE=="Theft of Vehicle"))
year_month_TfV[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & TYPE=="Theft from Vehicle"))
year_month_ToB[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & TYPE=="Theft of Bicycle"))
year_month_BEC[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & TYPE=="Break and Enter Commercial"))
year_month_BERO[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & TYPE=="Break and Enter Residential/Other"))
year_month_VCPSI[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & TYPE=="Vehicle Collision or Pedestrian Struck (with Injury)"))
year_month_VCPSF[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & TYPE=="Vehicle Collision or Pedestrian Struck (with Fatality)"))
# group by NEIGHBOURHOOD
year_month_Strathcona[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & NEIGHBOURHOOD=="Strathcona"))
year_month_Kerrisdale[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & NEIGHBOURHOOD=="Kerrisdale"))
year_month_DunbarSouthlands[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & NEIGHBOURHOOD=="Dunbar-Southlands"))
year_month_GrandviewWoodland[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & NEIGHBOURHOOD=="Grandview-Woodland"))
year_month_Sunset[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & NEIGHBOURHOOD=="Sunset"))
year_month_WestEnd[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & NEIGHBOURHOOD=="West End"))
year_month_CentralBusinessDistrict[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & NEIGHBOURHOOD=="Central Business District"))
year_month_HastingsSunrise[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & NEIGHBOURHOOD=="Hastings-Sunrise"))
year_month_VictoriaFraserview[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & NEIGHBOURHOOD=="Victoria-Fraserview"))
year_month_Fairview[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & NEIGHBOURHOOD=="Fairview"))
year_month_KensingtonCedarCottage[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & NEIGHBOURHOOD=="Kensington-Cedar Cottage"))
year_month_WestPointGrey[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & NEIGHBOURHOOD=="West Point Grey"))
year_month_Shaughnessy[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & NEIGHBOURHOOD=="Shaughnessy"))
year_month_RenfrewCollingwood[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & NEIGHBOURHOOD=="Renfrew-Collingwood"))
year_month_Killarney[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & NEIGHBOURHOOD=="Killarney"))
year_month_RileyPark[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & NEIGHBOURHOOD=="Riley Park"))
year_month_ArbutusRidge[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & NEIGHBOURHOOD=="Arbutus Ridge"))
year_month_Musqueam[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & NEIGHBOURHOOD=="Musqueam"))
year_month_MountPleasant[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & NEIGHBOURHOOD=="Mount Pleasant"))
year_month_Kitsilano[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & NEIGHBOURHOOD=="Kitsilano"))
year_month_StanleyPark[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & NEIGHBOURHOOD=="Stanley Park"))
year_month_SouthCambie[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & NEIGHBOURHOOD=="South Cambie"))
year_month_Marpole[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & NEIGHBOURHOOD=="Marpole"))
year_month_Oakridge[i-2002,j] <- nrow(subset(crime, YEAR==i & MONTH==j & NEIGHBOURHOOD=="Oakridge"))
}
}
# all Crime - change row&col name
colnames(year_month)[1:ncol(year_month)] <- as.character(c(1:12))
rownames(year_month)[1:nrow(year_month)] <- as.character(c(2003:2017))
The matrix is transformed into a time series and intercepted up to the first 175 items for which there are actual observations.
# covert year_month matrix to month series (delete 0)
# all crime
time_series_long <- as.vector(t(year_month))
time_series <- time_series_long[1:175]
# group by TYPE
time_series_long <- as.vector(t(year_month_M)) # TYPE=="Mischief"
time_series_M <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_OT)) # TYPE=="Other Theft"
time_series_OT <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_ToV)) # TYPE=="Theft of Vehicle"
time_series_ToV <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_TfV)) # TYPE=="Theft from Vehicle"
time_series_TfV <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_ToB)) # TYPE=="Theft of Bicycle"
time_series_ToB <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_BEC)) # TYPE=="Break and Enter Commercial"
time_series_BEC <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_BERO)) # TYPE=="Break and Enter Residential/Other"
time_series_BERO <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_VCPSI)) # TYPE=="Vehicle Collision or Pedestrian Struck (with Injury)"
time_series_VCPSI <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_VCPSF)) # TYPE=="Vehicle Collision or Pedestrian Struck (with Fatality)"
time_series_VCPSF <- time_series_long[1:175]
# group by NEIGHBOURHOOD
time_series_long <- as.vector(t(year_month_Strathcona))
time_series_Strathcona <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_Kerrisdale))
time_series_Kerrisdale <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_DunbarSouthlands))
time_series_DunbarSouthlands <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_GrandviewWoodland))
time_series_GrandviewWoodland <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_Sunset))
time_series_Sunset <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_WestEnd))
time_series_WestEnd <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_CentralBusinessDistrict))
time_series_CentralBusinessDistrict <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_HastingsSunrise))
time_series_HastingsSunrise <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_VictoriaFraserview))
time_series_VictoriaFraserview <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_Fairview))
time_series_Fairview <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_KensingtonCedarCottage))
time_series_KensingtonCedarCottage <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_WestPointGrey))
time_series_WestPointGrey <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_Shaughnessy))
time_series_Shaughnessy <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_RenfrewCollingwood))
time_series_RenfrewCollingwood <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_Killarney))
time_series_Killarney <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_RileyPark))
time_series_RileyPark <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_ArbutusRidge))
time_series_ArbutusRidge <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_Musqueam))
time_series_Musqueam <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_MountPleasant))
time_series_MountPleasant <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_Kitsilano))
time_series_Kitsilano <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_StanleyPark))
time_series_StanleyPark <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_SouthCambie))
time_series_SouthCambie <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_Marpole))
time_series_Marpole <- time_series_long[1:175]
time_series_long <- as.vector(t(year_month_Oakridge))
time_series_Oakridge <- time_series_long[1:175]
Combine the data of different neighbourhoods into a matrix, rewrite rownames according to the neighbourhood.shp file,rewrite colnames by month, and output the result as a .csv file.
# combine all district data to one matrix
time_series_by_neighbourhood <- rbind(time_series_Strathcona,time_series_Kerrisdale,time_series_DunbarSouthlands,
time_series_GrandviewWoodland,time_series_Sunset,time_series_WestEnd,
time_series_CentralBusinessDistrict,time_series_HastingsSunrise,time_series_VictoriaFraserview,
time_series_Fairview,time_series_KensingtonCedarCottage,time_series_WestPointGrey,
time_series_Shaughnessy,time_series_RenfrewCollingwood,time_series_Killarney,
time_series_RileyPark,time_series_ArbutusRidge,time_series_Musqueam,
time_series_MountPleasant,time_series_Kitsilano,time_series_StanleyPark,
time_series_SouthCambie,time_series_Marpole,time_series_Oakridge)
# rename district name according to .shp file
rownames(time_series_by_neighbourhood)[1:nrow(time_series_by_neighbourhood)] <- as.character(c("Strathcona","Kerrisdale","Dunbar-Southlands",
"Grandview-Woodland","Sunset","West End",
"Downtown","Hastings-Sunrise","Victoria-Fraserview",
"Fairview","Kensington-Cedar Cottage","West Point Grey",
"Shaughnessy","Renfrew-Collingwood","Killarney",
"Riley Park","Arbutus-Ridge", "Musqueam",
"Mount Pleasant","Kitsilano", "StanleyPark",
"South Cambie","Marpole","Oakridge"))
# set month number
Month.number <- paste("Month ",1:175,sep="")
colnames(time_series_by_neighbourhood) <- as.character(Month.number)
# output to .csv, in order to link in ArcGIS
write.csv(x = time_series_by_neighbourhood,file = "time_series_by_neighbourhood.csv")
There are 2 steps needs to be done before continue the codes:
1. Link the rime data in “time_series_by_neighbourhood.csv” to neighborhood data (.shp) by ArcGIS software.
2. Use the output to substitute the “local_area_boundary.dbf” file in the “Vancouver-local-area-boundary” folder.
After that, continue the following codes.
Actually, “local_area_boundary.dbf” file in the “Vancouver-local-area-boundary” folder has already linked by the author in advance, so you can continue the work now.
After pre-processing operations such as loading, arranging and cleaning the data, the temporal distribution of crime incidents in Vancouver from 2003.1.1 to 2017.7.13 could be plotted and basic mathematical statistical information could be presented.
This part of code will produce Figure 2.1 Vancouver Crime Time Series and Statistical Information
par(mfrow=c(1,3))
# Time series plot
plot(time_series, xlab = "Year", ylab = "Crime Number", type="l", xaxt="n", main="Time series of Crime by Month")
axis(1, at=seq(1,180,12), labels=seq(2003,2017,1))
# Histogram
mu = mean(time_series)
hist(time_series, main="Histogram of Crime (Monthly)")
abline(v=mu, col="red")
# Q-Q plot
qqnorm(time_series, main="Normal Q-Q Plot of Crime (Monthly)")
qqline(time_series, col="red")
First load some packages. Install if necessary.
library(ggplot2)
library(OpenStreetMap)
library(raster)
library(ggmap)
library(osmdata)
Crime in the dataset is recorded as a scatter and the 2003.10 data has been selected for presentation. In addition, it is possible to obtain the crime density of the different subdivisions.
This part of code will produce Figure 1.1 Spatial Distribution of Crime in Vancouver (2003.10)
par(mfrow=c(1,1))
# Figure: Crime in Vancouver, 2003.10
# add crime data: eg.2016 data
crime_data <- data.frame(subset(crime, YEAR==2003 & MONTH==10))
# plot Vancouver map
mad_map <- get_map(getbb("Vancouver"), maptype = "toner-background")
maptype = "toner-background" is only available with source = "stamen".
resetting to source = "stamen"...
ggmap(mad_map)+
geom_point (data=crime_data, aes(x=Longitude,y=Latitude), color = "#BA4A00",size=0.3)+
ggtitle("Crime in Vancouver, 2003.10")+
labs(x = "LON", y = "LAT")
# Figure: Crime distribution in 2003.10
library(rgdal)
library(tmap)
vancouver_districts <- readOGR(dsn="Vancouver-local-area-boundary/local_area_boundary.shp") # local_area_boundary.shp has already linked crime data
OGR data source with driver: ESRI Shapefile
Source: "/Users/zmxu/Desktop/UCL MSc Geospatial Sciences/term2-周2-CEGE0042-Spatial-Temporal Data Analysis and Data Mining (STDM) /Coursework/Vancouver-local-area-boundary/local_area_boundary.shp", layer: "local_area_boundary"
with 22 features
It has 178 fields
#tmap_style("cobalt") # set style
#tmap_style("white") # back to original
tm_shape(vancouver_districts)+
tm_fill("Month_10", style="jenks", palette="Blues")+
tm_borders("white")+
tm_compass(position=c("left","top"))+
tm_scale_bar(position=c(0.6,0.005))
Warning in sp::proj4string(obj) :
CRS object has comment, which is lost in output; in tests, see
https://cran.r-project.org/web/packages/sp/vignettes/CRS_warnings.html
More in-depth exploration of crime data was conducted to calculate temporal autocorrelations for different time scales and different sub-districts. For the time scales, the time series of Vancouver in terms of months and years were selected to calculate ACF and PACF. Two neighbourhoods, Downtown and Marpole, where the number of crime incidents differed significantly, were selected for comparison.
This part of code will produce Figure 2.2 Temporal autocorrelation: ACF&PACF of different time and space scales
Downtown <- time_series_by_neighbourhood[7,1:175]
Marpole <- time_series_by_neighbourhood[23,1:175]
par(mfrow=c(2,4))
acf(colMeans(matrix(time_series,12)), main="ACF, Annual")
Warning in matrix(time_series, 12) :
数据长度[175]不是矩阵行数[12]的整倍
acf(time_series,lag.max=35, main="ACF, Monthly")
acf(Downtown,lag.max=35, main="ACF, Monthly, Downtown")
acf(Marpole,lag.max=35, main="ACF, Monthly, Marpole")
pacf(colMeans(matrix(time_series,12)), main="PACF, Annual")
Warning in matrix(time_series, 12) :
数据长度[175]不是矩阵行数[12]的整倍
pacf(time_series,lag.max=35, main="PACF, Monthly")
pacf(Downtown,lag.max=35, main="PACF, Monthly, Downtown")
pacf(Marpole,lag.max=35, main="PACF, Monthly, Marpole")
Use Moran’s I to evaluate Global Spatial Autocorrelation.
library(spdep)
W <- nb2listw(poly2nb(vancouver_districts))
W
Characteristics of weights list object:
Neighbour list object:
Number of regions: 22
Number of nonzero links: 102
Percentage nonzero weights: 21.07438
Average number of links: 4.636364
Weights style: W
Weights constants summary:
library(knitr)
kable(listw2mat(W))
| 0 | 0.0000000 | 0.2500000 | 0.00 | 0.2500000 | 0.0000000 | 0.0000000 | 0.2500000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.2500000 |
| 1 | 0.2000000 | 0.0000000 | 0.00 | 0.0000000 | 0.0000000 | 0.0000000 | 0.2000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.2000000 | 0.0000000 | 0.2000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.2000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| 2 | 0.0000000 | 0.0000000 | 0.00 | 0.0000000 | 0.0000000 | 0.5000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.5000000 | 0.0000000 | 0.0000000 |
| 3 | 0.1428571 | 0.0000000 | 0.00 | 0.0000000 | 0.0000000 | 0.0000000 | 0.1428571 | 0.1428571 | 0.1428571 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.1428571 | 0.0000000 | 0.1428571 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.1428571 |
| 4 | 0.0000000 | 0.0000000 | 0.00 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.2000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.2000000 | 0.2000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.2000000 | 0.2000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| 5 | 0.0000000 | 0.0000000 | 0.25 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.2500000 | 0.0000000 | 0.0000000 | 0.2500000 | 0.2500000 | 0.0000000 |
| 6 | 0.2500000 | 0.2500000 | 0.00 | 0.2500000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.2500000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| 7 | 0.0000000 | 0.0000000 | 0.00 | 0.2000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.2000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.2000000 | 0.2000000 | 0.0000000 | 0.2000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| 8 | 0.0000000 | 0.0000000 | 0.00 | 0.1428571 | 0.1428571 | 0.0000000 | 0.0000000 | 0.1428571 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.1428571 | 0.1428571 | 0.0000000 | 0.1428571 | 0.0000000 | 0.1428571 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| 9 | 0.0000000 | 0.0000000 | 0.00 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.2500000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.2500000 | 0.0000000 | 0.2500000 | 0.2500000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| 10 | 0.0000000 | 0.0000000 | 0.00 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.3333333 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.3333333 | 0.0000000 | 0.0000000 | 0.3333333 | 0.0000000 | 0.0000000 |
| 11 | 0.0000000 | 0.3333333 | 0.00 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.3333333 | 0.0000000 | 0.3333333 | 0.0000000 |
| 12 | 0.0000000 | 0.0000000 | 0.00 | 0.0000000 | 0.1666667 | 0.0000000 | 0.0000000 | 0.0000000 | 0.1666667 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.1666667 | 0.1666667 | 0.1666667 | 0.0000000 | 0.1666667 | 0.0000000 |
| 13 | 0.0000000 | 0.1666667 | 0.00 | 0.1666667 | 0.1666667 | 0.0000000 | 0.1666667 | 0.0000000 | 0.1666667 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.1666667 | 0.0000000 | 0.0000000 | 0.0000000 |
| 14 | 0.0000000 | 0.0000000 | 0.00 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.3333333 | 0.0000000 | 0.3333333 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.3333333 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| 15 | 0.0000000 | 0.0000000 | 0.00 | 0.3333333 | 0.0000000 | 0.0000000 | 0.0000000 | 0.3333333 | 0.3333333 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| 16 | 0.0000000 | 0.0000000 | 0.00 | 0.0000000 | 0.0000000 | 0.1428571 | 0.0000000 | 0.0000000 | 0.0000000 | 0.1428571 | 0.1428571 | 0.0000000 | 0.1428571 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.1428571 | 0.0000000 | 0.1428571 | 0.1428571 | 0.0000000 |
| 17 | 0.0000000 | 0.0000000 | 0.00 | 0.0000000 | 0.1428571 | 0.0000000 | 0.0000000 | 0.1428571 | 0.1428571 | 0.1428571 | 0.0000000 | 0.0000000 | 0.1428571 | 0.0000000 | 0.1428571 | 0.0000000 | 0.1428571 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| 18 | 0.0000000 | 0.1666667 | 0.00 | 0.0000000 | 0.1666667 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.1666667 | 0.1666667 | 0.1666667 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.1666667 | 0.0000000 |
| 19 | 0.0000000 | 0.0000000 | 0.25 | 0.0000000 | 0.0000000 | 0.2500000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.2500000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.2500000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| 20 | 0.0000000 | 0.0000000 | 0.00 | 0.0000000 | 0.0000000 | 0.2000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.2000000 | 0.2000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.2000000 | 0.0000000 | 0.2000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| 21 | 0.5000000 | 0.0000000 | 0.00 | 0.5000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
# Global Spatial Autocorrelation Measures
# Moran Index
crime_matrix<-data.matrix(vancouver_districts@data[,-c(1:3)])
rownames(crime_matrix) <- vancouver_districts@data[,"name"]
crime_avg <- rowMeans(crime_matrix)
moran.test(x=crime_avg, listw=W) # Moran I 0.25908 low
Moran I test under randomisation
data: crime_avg
weights: W
Moran I statistic standard deviate = 3.579, p-value = 0.0001724
alternative hypothesis: greater
sample estimates:
Moran I statistic Expectation Variance
0.259082478 -0.047619048 0.007343518
moran.mc(x=crime_avg, listw=W, nsim=9999) # another way to calculate Moran I
Monte-Carlo simulation of Moran I
data: crime_avg
weights: W
number of simulations + 1: 10000
statistic = 0.25908, observed rank = 9990, p-value = 0.001
alternative hypothesis: greater
Spatial autocorrelation can be demonstrated by calculating and plotting Moran scatter plots.
This part of code will produce Figure 2.3 Spatial autocorrelation: Moran scatter plot & LISA plot
# Figure: Moran scatter plot
par(mfrow=c(1,1))
moran.plot(crime_avg,W,main="Morans'I = 0.25908")
# local Moran's I
lm <- localmoran(x=rowMeans(crime_matrix), listw=W)
lm
Ii E.Ii Var.Ii Z.Ii Pr(z != E(Ii))
0 0.275539223 -1.815928e-02 0.0833530181 1.01728132 0.309019618
1 0.380398754 -1.896624e-02 0.0654949450 1.56050836 0.118639796
2 0.057384293 -1.160484e-02 0.1198632786 0.19926803 0.842053086
3 0.094620297 -2.806045e-03 0.0061559769 1.24173209 0.214335425
4 0.120192672 -2.555379e-02 0.0876507866 0.49228897 0.622515076
5 0.031630224 -1.088234e-02 0.0503213180 0.18951401 0.849689972
6 0.299841147 -2.292096e-02 0.1046993779 0.99749445 0.318524564
7 1.658086603 -7.875573e-01 0.5889339878 3.18683747 0.001438376
8 0.264829355 -1.146462e-02 0.0249329936 1.74978301 0.080155763
9 0.027375968 -3.333492e-03 0.0155321240 0.24640910 0.805365562
10 -0.032842937 -1.020356e-03 0.0067274775 -0.38797994 0.698030880
11 0.177001646 -6.706573e-03 0.0439665252 0.87612791 0.380960502
12 0.029782633 -7.649295e-03 0.0208746546 0.25907920 0.795574134
13 0.232312909 -2.488060e-02 0.0667192841 0.99571332 0.319389454
14 -0.031032327 -1.830546e-05 0.0001208138 -2.82162828 0.004778052
15 1.452621893 -3.924731e-02 0.2488659181 2.99052912 0.002784946
16 -0.006464167 -1.264458e-03 0.0027782899 -0.09864845 0.921417399
17 0.228230989 -8.276889e-03 0.0180584396 1.75997095 0.078412732
18 0.306399982 -1.739076e-02 0.0469928908 1.49364830 0.135267567
19 -0.058636576 -2.871483e-03 0.0133856377 -0.48199541 0.629809210
20 0.060873706 -1.534573e-03 0.0053934067 0.84978805 0.395442934
21 0.131668221 -2.350952e-02 0.2398988410 0.31682202 0.751378652
attr(,"call")
localmoran(x = rowMeans(crime_matrix), listw = W)
attr(,"class")
[1] "localmoran" "matrix" "array"
attr(,"quadr")
mean median pysal
1 Low-Low Low-Low Low-Low
2 Low-Low Low-Low Low-Low
3 Low-Low Low-Low Low-Low
4 High-High High-High High-High
5 Low-Low Low-Low Low-Low
6 Low-Low Low-High Low-Low
7 Low-Low Low-Low Low-Low
8 High-High High-High High-High
9 High-High High-High High-High
10 High-High High-High High-High
11 Low-High High-High Low-High
12 Low-Low Low-Low Low-Low
13 Low-Low Low-High Low-Low
14 Low-Low Low-Low Low-Low
15 Low-High High-High Low-High
16 High-High High-High High-High
17 High-Low High-High High-Low
18 High-High High-High High-High
19 Low-Low Low-Low Low-Low
20 High-Low High-Low High-Low
21 Low-Low High-Low Low-Low
22 Low-Low Low-Low Low-Low
# Figure: LISA (this part is done by GeoDa)
In order to demonstrate the degree of Spatio-temporal autocorrelation, a hotspot map was plotted, and Spatio-temporal Autocorrelation and Spatio-temporal Partial Autocorrelation were also calculated.
This part of code will produce Figure 2.4 Spatio-temporal autocorrelation
# Figure: Heatmap
library(RColorBrewer)
coul <- colorRampPalette(brewer.pal(8, "Blues"))(25)
heatmap(crime_matrix,Colv = NA, Rowv = NA, scale="column", col = coul, xlab="", ylab="", main="Heatmap")
source("starima_package.R")
Wmat <- listw2mat(W)
# Figure: STACF & STPACF
par(mfrow=c(1,2))
stacf(t(crime_matrix), Wmat, 48)
$stacf
[,1]
[1,] 1.000000000
[2,] 0.681830757
[3,] 0.647997888
[4,] 0.613655677
[5,] 0.591319420
[6,] 0.589197614
[7,] 0.561673083
[8,] 0.559360454
[9,] 0.539942320
[10,] 0.541107172
[11,] 0.550946134
[12,] 0.539604173
[13,] 0.545986456
[14,] 0.508725027
[15,] 0.505354301
[16,] 0.483590278
[17,] 0.467482196
[18,] 0.458671720
[19,] 0.417908384
[20,] 0.414796365
[21,] 0.408419703
[22,] 0.403986718
[23,] 0.413164089
[24,] 0.380080579
[25,] 0.375963874
[26,] 0.332776628
[27,] 0.311078647
[28,] 0.274420198
[29,] 0.245770402
[30,] 0.227242775
[31,] 0.182145340
[32,] 0.196167492
[33,] 0.175316371
[34,] 0.160234458
[35,] 0.157946563
[36,] 0.137681063
[37,] 0.140050629
[38,] 0.107806381
[39,] 0.099920452
[40,] 0.067613495
[41,] 0.052352947
[42,] 0.051229324
[43,] 0.013947257
[44,] 0.019790411
[45,] -0.003293256
[46,] -0.009060873
[47,] -0.002067376
[48,] -0.027668297
[49,] -0.018088693
$ubound
[1] 0.03783701
$lbound
[1] -0.03783701
stpacf(t(crime_matrix), Wmat, 4)
$stpacf
[,1]
[1,] 0.02852303
[2,] -0.11258052
[3,] -0.16137186
[4,] -0.06886563
$ubound
[1] 0.7559289
$lbound
[1] -0.7559289
In this coursework, I tried to use ARIMA and ANN model to predict the Crime in Vancouver. ### ARIMA Model The Autoregressive Integrated Moving Average (ARIMA) model, is a class of models that interprets a given time series based on the historical values of the data. Any non-seasonal time series with a certain pattern that is not random white noise can be modelled and predicted by ARIMA.
ARIMA modelling according to the Box-Jenkins method has the following main steps:
a. Exploratory data analysis
b. Autocorrelation and partial autocorrelation analysis
c. Model Identification
d. Parameter Estimation and Fitting
e. Diagnostic Checking
f. Forecasting/Prediction
The data was first divided into a training set (80%) and a test set (20%). Based on the training set, exploratory data analysis was carried out and ACF, as well as PACF, were calculated.
library(TSA)
library(forecast)
# train set & test set
time_series_train <- time_series[1:140]
time_series_test <- time_series[141:175]
Then try to plot ACF and PACF to show the temporal autocorrelation.
This part of code will produce Figure 3.1 Train set temporal autocorrelation(before differencing)
par(mfrow=c(1,1))
# Figure: time_series & ACF & PACF
tsdisplay(time_series_train,xlab="Time (month)",ylab="Crime Number")
# Figure: lag 1 to 12
lag.plot(time_series_train, lags=12, do.lines=FALSE)
This part of code will produce Figure 3.2 Train set temporal autocorrelation(after differencing)
# Figure: diff=1, time_series & ACF & PACF
time_series_diff1 <- diff(time_series_train, lag=1, differences=1)
tsdisplay(time_series_diff1,xlab="Time (month)",ylab="Crime Number")
Estimate parameters by the figures above that ARIMA(1,1,1) might be suitable. Output the summary of this model. The parameters can also be automatically discriminated by using the auto.arima() function of the R Forecast package.
# estimate by figure 12 & 14: ARIMA(1,1,1)
fit.arima111 <- Arima(time_series_train,order=c(1,1,1))
summary(fit.arima111)
Series: time_series_train
ARIMA(1,1,1)
Coefficients:
ar1 ma1
0.4236 -0.7944
s.e. 0.1138 0.0686
sigma^2 = 48956: log likelihood = -946.92
AIC=1899.84 AICc=1900.02 BIC=1908.64
Training set error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set -27.07523 218.8768 161.2229 -1.362788 5.985335 0.9104567 -0.0343087
# determine parameters automatically
fit.arima <- auto.arima(time_series_train,trace=T)
ARIMA(2,1,2) with drift : 1902.269
ARIMA(0,1,0) with drift : 1918.171
ARIMA(1,1,0) with drift : 1906.709
ARIMA(0,1,1) with drift : 1905.322
ARIMA(0,1,0) : 1916.275
ARIMA(1,1,2) with drift : 1901.407
ARIMA(0,1,2) with drift : 1904.651
ARIMA(1,1,1) with drift : 1899.518
ARIMA(2,1,1) with drift : 1901.131
ARIMA(2,1,0) with drift : 1908.788
ARIMA(1,1,1) : 1900.019
Best model: ARIMA(1,1,1) with drift
fit.arima # ARIMA(1,1,1)
Series: time_series_train
ARIMA(1,1,1) with drift
Coefficients:
ar1 ma1 drift
0.4480 -0.8413 -10.3674
s.e. 0.1045 0.0582 5.5600
sigma^2 = 48348: log likelihood = -945.61
AIC=1899.22 AICc=1899.52 BIC=1910.96
summary(fit.arima) # model summary
Series: time_series_train
ARIMA(1,1,1) with drift
Coefficients:
ar1 ma1 drift
0.4480 -0.8413 -10.3674
s.e. 0.1045 0.0582 5.5600
sigma^2 = 48348: log likelihood = -945.61
AIC=1899.22 AICc=1899.52 BIC=1910.96
Training set error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set -0.7814325 216.718 158.1987 -0.3201685 5.829734 0.8933783 -0.01735723
Based on the ARIMA (1, 1, 1) model future data can be predicted and compared with the test set.
This part of code will produce Figure 3.4 Vancouver Crime Prediction Results by ARIMA
# Figure: forcast & contrast
fit.Ar <- Arima(time_series_train, order=c(1,1,1))
pre.Ar <- Arima(time_series_test, model=fit.Ar)
matplot(cbind(pre.Ar$x, pre.Ar$fitted),ylab="Crime Number", xlab="Month", main="Vancouver Crime, ARIMA",type="l")
legend("bottomleft", title="Data", c("Real","Predict"), lty=c(1, 2), col=c("black", "red"))
This part of code will produce Figure 3.2 Residual distribution
# Figure: Model Residuals
checkresiduals(fit.arima) # residuals 1
Ljung-Box test
data: Residuals from ARIMA(1,1,1) with drift
Q* = 25.269, df = 7, p-value = 0.0006798
Model df: 3. Total lags used: 10
tsdiag(fit.arima) # residuals 2
Calculate the NRMSE by function NRMSE() of “starima_package.R”
source("starima_package.R")
NRMSE_ARIMA <- NRMSE(res=fit.Ar$residuals, obs=time_series_train)
NRMSE_ARIMA
[1] 0.3233448
NRMSE_ANN <- NRMSE(obs=time_series_test, pred=pre.Ar$fitted)
NRMSE_ARIMA
[1] 0.3233448
The Artificial Neural Network (ANN) is a simulation of biological neurons, with a model that resembles the human brain and consists of a large number of processors, which is more effective in predicting spatial and temporal changes in crime and disorder events. The same method was used to divide the data into a train set and a test set, and a multiple input, multiple output ANN model was constructed, using all the data from the previous month to predict all the data for the following month. Load the necessary packages. Same method was used to divide the data into a train set and a test set.
library(nnet)
library(rgdal)
vancouver_districts <- readOGR(dsn="Vancouver-local-area-boundary/local_area_boundary.shp")
OGR data source with driver: ESRI Shapefile
Source: "/Users/zmxu/Desktop/UCL MSc Geospatial Sciences/term2-周2-CEGE0042-Spatial-Temporal Data Analysis and Data Mining (STDM) /Coursework/Vancouver-local-area-boundary/local_area_boundary.shp", layer: "local_area_boundary"
with 22 features
It has 178 fields
crime_matrix<-data.matrix(vancouver_districts@data[,-c(1:3)])
rownames(crime_matrix) <- vancouver_districts@data[,"name"]
X <- t(as.matrix(crime_matrix))
y <- as.matrix(X[-1,])
In order to compare the fitting results for different neighbourhoods of the same method, forecasts were made for Downtown and Marpole using the ANN model.
This part of code will produce Figure 4.1 Prediction results of different regions and models
par(mfrow=c(2,2))
# Figure: ANN plot, Downtown
set.seed(319)
crime.nnet <- nnet(X[1:140, 1:22], y[1:140, 1:22], decay=0.01, linout = TRUE, size=6, maxit = 1000)
# weights: 292
initial value 98191267.052504
iter 10 value 6540269.910012
iter 20 value 6456853.594410
iter 30 value 6440131.798582
iter 40 value 6430070.550671
iter 50 value 6190821.636961
iter 60 value 6052489.109279
iter 70 value 6004548.915123
iter 80 value 5899091.140544
iter 90 value 5862624.532224
iter 100 value 5027160.664144
iter 110 value 5003692.539515
iter 120 value 4985906.377217
iter 130 value 4975529.867173
iter 140 value 4969446.088003
iter 150 value 4512972.755589
iter 160 value 3638734.705448
iter 170 value 3202395.868386
iter 180 value 3196106.607963
iter 190 value 2860389.744495
iter 200 value 2710223.795482
iter 210 value 2645088.896750
iter 220 value 2566461.332465
iter 230 value 2518645.348258
iter 240 value 2481300.267834
iter 250 value 2463085.220910
iter 260 value 2445009.226370
iter 270 value 2444903.421946
iter 280 value 2444177.083635
iter 290 value 2443947.614192
iter 300 value 2443895.634584
iter 310 value 2315530.067978
iter 320 value 2303639.921079
iter 330 value 2303277.730720
iter 340 value 2295821.649836
iter 350 value 2273608.544518
iter 360 value 2271962.360639
iter 370 value 2270235.512886
iter 380 value 2257397.128255
iter 390 value 2253252.990008
iter 400 value 2250791.416826
iter 410 value 2249933.490904
iter 420 value 2249638.232910
iter 430 value 2248877.906796
iter 440 value 2230870.685796
iter 450 value 2229593.998913
iter 460 value 2229113.002047
iter 470 value 2225256.309701
iter 480 value 2218305.646639
iter 490 value 2217427.031341
iter 500 value 2214253.162390
iter 510 value 2207303.080509
iter 520 value 2206463.093291
iter 530 value 2206049.398290
iter 540 value 2205414.956075
iter 550 value 2204432.079485
iter 560 value 2204252.467083
iter 570 value 2204053.463581
iter 580 value 2200483.675570
iter 590 value 2199154.558053
iter 600 value 2187233.184115
iter 610 value 2187075.347308
iter 620 value 2187012.560460
iter 630 value 2186126.042211
iter 640 value 2168417.423138
iter 650 value 2165832.677172
iter 660 value 2163859.292401
iter 670 value 2160344.840179
iter 680 value 2159062.239341
iter 690 value 2158083.554536
iter 700 value 2156895.564240
iter 710 value 2151187.400387
iter 720 value 2150949.078257
iter 730 value 2149830.460678
iter 740 value 2149044.195873
iter 750 value 2148290.243709
iter 760 value 2148026.745897
iter 770 value 2147775.450199
iter 780 value 2147282.219644
iter 790 value 2146793.035256
iter 800 value 2145041.359649
iter 810 value 2144610.219312
iter 820 value 2142710.174795
iter 830 value 2139869.719852
iter 840 value 2118716.372696
iter 850 value 2117959.633411
iter 860 value 2117395.298765
iter 870 value 2116130.975365
iter 880 value 2111760.862415
iter 890 value 2110821.148497
iter 900 value 2110166.708710
iter 910 value 2103226.626991
iter 920 value 2102346.592837
iter 930 value 2101234.971023
iter 940 value 2100466.463991
iter 950 value 2096346.917127
iter 960 value 2090067.478839
iter 970 value 2089640.031363
iter 980 value 2089479.459340
iter 990 value 2088984.722966
iter1000 value 2088214.220523
final value 2088214.220523
stopped after 1000 iterations
crime.pred <- predict(crime.nnet, y[141:174, 1:22])
crime.pred[,8]
Month_142 Month_143 Month_144 Month_145 Month_146 Month_147 Month_148 Month_149 Month_150 Month_151 Month_152 Month_153
627.8965 572.4430 556.2916 604.0940 548.2278 585.4139 572.7730 586.2225 619.7385 699.8101 640.1250 591.3159
Month_154 Month_155 Month_156 Month_157 Month_158 Month_159 Month_160 Month_161 Month_162 Month_163 Month_164 Month_165
645.9375 628.0530 634.6148 748.2268 579.4265 689.9188 609.6612 691.6682 709.2503 739.1280 709.5320 654.8923
Month_166 Month_167 Month_168 Month_169 Month_170 Month_171 Month_172 Month_173 Month_174 Month_175
647.2610 621.1639 548.6760 539.0821 526.9772 667.2648 598.8087 644.9787 615.2200 471.6502
matplot(cbind(y[141:174,8], crime.pred[,8]),ylab="Crime Number", xlab="Month", main="Downtown Crime, ANN", type="l")
NRMSE_ANN <- NRMSE(obs=X[141:174,8],pred=crime.pred[,8])
NRMSE_ANN
[1] 1.393123
# Figure: ANN plot, Marpole
set.seed(319)
crime.nnet <- nnet(X[1:140, 1:22], y[1:140, 1:22], decay=0.01, linout = TRUE, size=6, maxit = 1000)
# weights: 292
initial value 98191267.052504
iter 10 value 6540269.910012
iter 20 value 6456853.594410
iter 30 value 6440131.798582
iter 40 value 6430070.550671
iter 50 value 6190821.636961
iter 60 value 6052489.109279
iter 70 value 6004548.915123
iter 80 value 5899091.140544
iter 90 value 5862624.532224
iter 100 value 5027160.664144
iter 110 value 5003692.539515
iter 120 value 4985906.377217
iter 130 value 4975529.867173
iter 140 value 4969446.088003
iter 150 value 4512972.755589
iter 160 value 3638734.705448
iter 170 value 3202395.868386
iter 180 value 3196106.607963
iter 190 value 2860389.744495
iter 200 value 2710223.795482
iter 210 value 2645088.896750
iter 220 value 2566461.332465
iter 230 value 2518645.348258
iter 240 value 2481300.267834
iter 250 value 2463085.220910
iter 260 value 2445009.226370
iter 270 value 2444903.421946
iter 280 value 2444177.083635
iter 290 value 2443947.614192
iter 300 value 2443895.634584
iter 310 value 2315530.067978
iter 320 value 2303639.921079
iter 330 value 2303277.730720
iter 340 value 2295821.649836
iter 350 value 2273608.544518
iter 360 value 2271962.360639
iter 370 value 2270235.512886
iter 380 value 2257397.128255
iter 390 value 2253252.990008
iter 400 value 2250791.416826
iter 410 value 2249933.490904
iter 420 value 2249638.232910
iter 430 value 2248877.906796
iter 440 value 2230870.685796
iter 450 value 2229593.998913
iter 460 value 2229113.002047
iter 470 value 2225256.309701
iter 480 value 2218305.646639
iter 490 value 2217427.031341
iter 500 value 2214253.162390
iter 510 value 2207303.080509
iter 520 value 2206463.093291
iter 530 value 2206049.398290
iter 540 value 2205414.956075
iter 550 value 2204432.079485
iter 560 value 2204252.467083
iter 570 value 2204053.463581
iter 580 value 2200483.675570
iter 590 value 2199154.558053
iter 600 value 2187233.184115
iter 610 value 2187075.347308
iter 620 value 2187012.560460
iter 630 value 2186126.042211
iter 640 value 2168417.423138
iter 650 value 2165832.677172
iter 660 value 2163859.292401
iter 670 value 2160344.840179
iter 680 value 2159062.239341
iter 690 value 2158083.554536
iter 700 value 2156895.564240
iter 710 value 2151187.400387
iter 720 value 2150949.078257
iter 730 value 2149830.460678
iter 740 value 2149044.195873
iter 750 value 2148290.243709
iter 760 value 2148026.745897
iter 770 value 2147775.450199
iter 780 value 2147282.219644
iter 790 value 2146793.035256
iter 800 value 2145041.359649
iter 810 value 2144610.219312
iter 820 value 2142710.174795
iter 830 value 2139869.719852
iter 840 value 2118716.372696
iter 850 value 2117959.633411
iter 860 value 2117395.298765
iter 870 value 2116130.975365
iter 880 value 2111760.862415
iter 890 value 2110821.148497
iter 900 value 2110166.708710
iter 910 value 2103226.626991
iter 920 value 2102346.592837
iter 930 value 2101234.971023
iter 940 value 2100466.463991
iter 950 value 2096346.917127
iter 960 value 2090067.478839
iter 970 value 2089640.031363
iter 980 value 2089479.459340
iter 990 value 2088984.722966
iter1000 value 2088214.220523
final value 2088214.220523
stopped after 1000 iterations
crime.pred <- predict(crime.nnet, y[141:174, 1:22])
crime.pred[,12]
Month_142 Month_143 Month_144 Month_145 Month_146 Month_147 Month_148 Month_149 Month_150 Month_151 Month_152 Month_153
67.19320 58.45179 55.90578 63.44110 54.63465 60.49647 58.50383 60.62393 65.90722 78.52929 69.12083 61.42682
Month_154 Month_155 Month_156 Month_157 Month_158 Month_159 Month_160 Month_161 Month_162 Month_163 Month_164 Month_165
70.03709 67.21787 68.25223 86.16144 59.55264 76.97008 64.31868 77.24584 80.01738 84.72715 80.06179 71.44868
Month_166 Month_167 Month_168 Month_169 Month_170 Month_171 Month_172 Month_173 Month_174 Month_175
70.24573 66.13191 54.70529 53.19296 51.28482 73.39902 62.60796 69.88595 65.19494 42.56335
matplot(cbind(y[141:174,12], crime.pred[,12]),ylab="Crime Number", xlab="Month", main="Marpole Crime, ANN", type="l")
NRMSE_ANN <- NRMSE(obs=X[141:174,12],pred=crime.pred[,12])
NRMSE_ANN
[1] 1.366676
# create Vancouver Total Crime numbers of each month
X.sum <- as.matrix(rowSums(X))
X.sum <- cbind(X, X.sum)
colnames(X.sum)[ncol(X.sum)] <- as.character(c("Vancouver"))
y.sum <- as.matrix(X.sum[-1,])
# Figure: ANN plot, Vancouver
set.seed(319)
crime.nnet <- nnet(X.sum[1:140, 1:23], y.sum[1:140, 1:23], decay=10, linout = TRUE, size=6, maxit = 100)
# weights: 305
initial value 1172114428.817725
iter 10 value 94346587.189418
iter 20 value 90378454.876764
iter 30 value 84999694.549714
iter 40 value 81913977.734926
iter 50 value 81797779.925512
iter 60 value 81770360.854423
iter 70 value 81322991.452750
iter 80 value 77724097.717811
iter 90 value 56344686.784114
iter 100 value 51716926.157825
final value 51716926.157825
stopped after 100 iterations
crime.pred <- predict(crime.nnet, y.sum[141:174, 1:23])
crime.pred[,23]
Month_142 Month_143 Month_144 Month_145 Month_146 Month_147 Month_148 Month_149 Month_150 Month_151 Month_152 Month_153
2824.585 2279.116 2306.306 2356.334 2162.967 2263.536 2033.330 2455.533 2475.924 2933.015 2630.916 2465.022
Month_154 Month_155 Month_156 Month_157 Month_158 Month_159 Month_160 Month_161 Month_162 Month_163 Month_164 Month_165
2704.824 2726.688 2695.615 2951.409 2346.116 2858.178 2633.371 3045.829 3033.274 3253.546 3094.646 3053.486
Month_166 Month_167 Month_168 Month_169 Month_170 Month_171 Month_172 Month_173 Month_174 Month_175
2770.618 2524.812 2250.414 2207.357 2151.975 2936.008 2414.120 2736.046 2370.799 1845.675
matplot(cbind(y.sum[141:174,23], crime.pred[,23]),ylab="Crime Number", xlab="Month", main="Vancouver Crime, ANN", type="l")
# legend("bottomleft", title="Data", c("Real","Predict"), lty=c(1, 2), col=c("black", "red"))
NRMSE_ANN <- NRMSE(res=crime.nnet$residuals, obs=X.sum[1:140, 1:23])
NRMSE_ANN
[1] 0.115577
NRMSE_ANN <- NRMSE(obs=X.sum[141:174,23],pred=crime.pred[,23])
NRMSE_ANN
[1] 1.004691
matplot(cbind(pre.Ar$x, pre.Ar$fitted),ylab="Crime Number", xlab="Month", main="Vancouver Crime, ARIMA",type="l")
This part of code will produce Figure 3.5 Internal structure of ANN
par(mfrow=c(1,1))
# Figure: Visualisation of internal structure of ANN
library(devtools)
source_url('https://gist.githubusercontent.com/fawda123/7471137/raw/466c1474d0a505ff044412703516c34f1a4684a5/nnet_plot_update.r')
ℹ SHA-1 hash of file is 74c80bd5ddbc17ab3ae5ece9c0ed9beb612e87ef
plot.nnet(crime.nnet)
summary(crime.nnet) # check the weights
a 23-6-23 network with 305 weights
options were - linear output units decay=10
b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1 i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1
-1.20 0.00 -0.03 -0.01 -0.01 -0.06 -0.01 0.06 0.00 0.02 0.01 -0.03 0.01 -0.01 0.03
i15->h1 i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
-0.01 0.00 0.01 -0.02 0.02 0.01 -0.03 0.03 0.00
b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2 i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2
0.00 -0.12 -0.09 -0.11 -0.31 -0.06 -0.12 -0.05 -1.44 -0.39 -0.30 -0.20 -0.13 -0.16 -0.09
i15->h2 i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
-0.28 -0.42 -0.34 -0.44 -0.13 -0.36 -0.18 -0.07 -5.77
b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3 i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3
-10.96 0.00 0.00 -0.01 0.01 0.02 0.00 -0.03 0.00 -0.01 0.00 0.01 0.02 0.00 -0.01
i15->h3 i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
0.01 0.01 0.00 0.01 -0.03 0.00 0.01 -0.01 0.00
b->h4 i1->h4 i2->h4 i3->h4 i4->h4 i5->h4 i6->h4 i7->h4 i8->h4 i9->h4 i10->h4 i11->h4 i12->h4 i13->h4 i14->h4
0.01 0.26 0.26 -0.43 -2.13 -0.03 -0.32 -0.19 -5.87 -4.69 -1.81 -0.39 -0.19 -0.06 0.01
i15->h4 i16->h4 i17->h4 i18->h4 i19->h4 i20->h4 i21->h4 i22->h4 i23->h4
0.72 -3.82 -2.78 -1.06 0.64 -3.44 -1.58 -0.08 -26.99
b->h5 i1->h5 i2->h5 i3->h5 i4->h5 i5->h5 i6->h5 i7->h5 i8->h5 i9->h5 i10->h5 i11->h5 i12->h5 i13->h5 i14->h5
0.00 0.16 0.15 0.11 0.22 0.10 0.16 0.10 1.35 -0.28 0.23 0.34 0.20 0.23 0.04
i15->h5 i16->h5 i17->h5 i18->h5 i19->h5 i20->h5 i21->h5 i22->h5 i23->h5
0.46 0.33 -0.07 0.31 0.22 -0.10 0.02 0.09 4.38
b->h6 i1->h6 i2->h6 i3->h6 i4->h6 i5->h6 i6->h6 i7->h6 i8->h6 i9->h6 i10->h6 i11->h6 i12->h6 i13->h6 i14->h6
0.00 0.01 0.01 0.01 0.03 0.00 0.01 0.00 0.04 0.03 0.02 0.02 0.01 0.01 0.01
i15->h6 i16->h6 i17->h6 i18->h6 i19->h6 i20->h6 i21->h6 i22->h6 i23->h6
0.01 0.02 0.01 0.02 0.00 0.03 0.01 0.01 0.32
b->o1 h1->o1 h2->o1 h3->o1 h4->o1 h5->o1 h6->o1
15.48 28.98 0.07 9.33 0.20 -0.48 11.23
b->o2 h1->o2 h2->o2 h3->o2 h4->o2 h5->o2 h6->o2
8.87 2.33 0.01 36.74 0.09 -1.91 8.46
b->o3 h1->o3 h2->o3 h3->o3 h4->o3 h5->o3 h6->o3
13.29 -0.46 -0.14 64.55 -0.47 0.29 6.14
b->o4 h1->o4 h2->o4 h3->o4 h4->o4 h5->o4 h6->o4
53.04 35.92 -0.35 137.11 -1.15 3.81 -0.50
b->o5 h1->o5 h2->o5 h3->o5 h4->o5 h5->o5 h6->o5
7.30 15.68 0.08 6.90 0.13 -1.67 5.18
b->o6 h1->o6 h2->o6 h3->o6 h4->o6 h5->o6 h6->o6
16.25 35.40 -0.06 36.33 -0.22 -2.99 9.40
b->o7 h1->o7 h2->o7 h3->o7 h4->o7 h5->o7 h6->o7
13.64 13.28 -0.05 13.95 -0.14 -2.08 3.83
b->o8 h1->o8 h2->o8 h3->o8 h4->o8 h5->o8 h6->o8
279.52 248.63 2.19 195.24 2.29 8.80 -3.04
b->o9 h1->o9 h2->o9 h3->o9 h4->o9 h5->o9 h6->o9
86.41 41.64 -0.44 90.34 -1.86 10.31 7.23
b->o10 h1->o10 h2->o10 h3->o10 h4->o10 h5->o10 h6->o10
53.28 50.00 -0.11 82.49 -0.86 -7.09 -8.53
b->o11 h1->o11 h2->o11 h3->o11 h4->o11 h5->o11 h6->o11
32.55 32.52 -0.07 69.92 -0.43 1.58 1.68
b->o12 h1->o12 h2->o12 h3->o12 h4->o12 h5->o12 h6->o12
16.59 12.41 -0.21 52.50 -0.59 2.56 9.15
b->o13 h1->o13 h2->o13 h3->o13 h4->o13 h5->o13 h6->o13
10.07 39.86 0.12 11.85 0.25 -8.22 1.30
b->o14 h1->o14 h2->o14 h3->o14 h4->o14 h5->o14 h6->o14
21.15 21.59 0.07 2.74 0.14 2.36 9.65
b->o15 h1->o15 h2->o15 h3->o15 h4->o15 h5->o15 h6->o15
33.22 48.99 0.39 38.65 0.51 5.51 1.29
b->o16 h1->o16 h2->o16 h3->o16 h4->o16 h5->o16 h6->o16
135.20 85.00 -0.64 108.77 -2.55 1.16 -27.22
b->o17 h1->o17 h2->o17 h3->o17 h4->o17 h5->o17 h6->o17
41.58 68.03 0.16 42.70 0.16 -0.68 11.14
b->o18 h1->o18 h2->o18 h3->o18 h4->o18 h5->o18 h6->o18
85.82 96.47 0.78 -19.72 0.97 -8.19 11.93
b->o19 h1->o19 h2->o19 h3->o19 h4->o19 h5->o19 h6->o19
7.06 39.20 0.32 -15.32 0.74 -13.33 6.44
b->o20 h1->o20 h2->o20 h3->o20 h4->o20 h5->o20 h6->o20
60.86 73.71 0.25 83.56 0.01 18.24 19.57
b->o21 h1->o21 h2->o21 h3->o21 h4->o21 h5->o21 h6->o21
44.02 23.00 -0.33 91.93 -1.06 16.65 14.99
b->o22 h1->o22 h2->o22 h3->o22 h4->o22 h5->o22 h6->o22
22.81 9.59 -0.09 12.59 -0.30 1.69 3.74
b->o23 h1->o23 h2->o23 h3->o23 h4->o23 h5->o23 h6->o23
1052.15 1019.86 1.96 1147.81 -4.18 20.23 99.78
This part of code will produce Figure 3.6 Predictions vs Observed
# Figure: Scatterplot of observed vs predicted values
plot(crime.pred,y.sum[141:174, 1:23], main="ANN predictions vs observed, Vancouver", xlab="Observed", ylab="Predicted")